library(rlang)
#library(SingleR)
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
## 
##     intersect, t
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::%@%()         masks rlang::%@%()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ purrr::flatten()     masks rlang::flatten()
## ✖ purrr::flatten_chr() masks rlang::flatten_chr()
## ✖ purrr::flatten_dbl() masks rlang::flatten_dbl()
## ✖ purrr::flatten_int() masks rlang::flatten_int()
## ✖ purrr::flatten_lgl() masks rlang::flatten_lgl()
## ✖ purrr::flatten_raw() masks rlang::flatten_raw()
## ✖ purrr::invoke()      masks rlang::invoke()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ purrr::splice()      masks rlang::splice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
options(future.globals.maxSize = 8000 * 1024^2)
source("../../configuration.R")
obj <- readRDS(STConfig$seurat_object_rds)
df <- read.csv(STConfig$annotation_with_stromal, row.names = 1)
head(df)
#merge in the singleR_stromal_w_megs into obj@metadata
new_df = obj@meta.data

# Check if all row names of df1 are in df2
common_row_names <- intersect(rownames(new_df), rownames(df))
#it's cos they are artefacts - don't worry about them!
#subset new_df to the common row names
new_df = new_df[common_row_names, ]

#subset obj to the common row names
obj = subset(obj, cell_id %in% common_row_names)
anno_df = read.csv(STConfig$pth_cell_annotations_final, row.names = 1)
anno_df = anno_df[common_row_names, ]
#subset to index and obj.anno_3_w_megs
sub_anno_df = anno_df[, c("obj.anno_3_w_megs_w_stromal","obj.anno_3_w_megs", "obj.anno_1_w_megs", "obj.anno_5_w_megs")]
head(sub_anno_df)
#merge in the singleR_stromal_w_megs into obj@metadata

library(dplyr)

df1 <- new_df %>% tibble::rownames_to_column(var = "Index")
df2 <- sub_anno_df %>% tibble::rownames_to_column(var = "Index")

# Merge by the Index column
merged_df <- df1 %>%
  left_join(df2, by = "Index") %>%
  tibble::column_to_rownames(var = "Index")  # Restore row names

# Check the result
head(merged_df)
all(rownames(merged_df) == rownames(obj@meta.data))
## [1] TRUE
obj$obj.anno_3_w_megs = merged_df$obj.anno_3_w_megs
obj$obj.anno_3_w_megs_w_stromal = merged_df$obj.anno_3_w_megs_w_stromal
obj$obj.anno_1_w_megs = merged_df$obj.anno_1_w_megs
obj$obj.anno_5_w_megs = merged_df$obj.anno_5_w_megs
#downsample to 200 cells
Idents(obj) = obj$sample_id
#ds_obj = subset(obj, downsample = 10000)

rm(df, df1, df2, new_df, merged_df)
gc()
##             used   (Mb) gc trigger   (Mb)   max used   (Mb)
## Ncells   4151663  221.8   22304570 1191.2   15031563  802.8
## Vcells 426734592 3255.8 1165898015 8895.1 1160479646 8853.8
`%!in%` = Negate(`%in%`)

unique(obj$obj.anno_5_w_megs)
##  [1] "MNP"              "Immature_myeloid" "Stromal"          "Myeloid"         
##  [5] "Lymphocyte"       "Erythroid"        "HSPC"             "Endothelial"     
##  [9] "Megakaryocyte"    "Granulocyte/mast" "CD69"
obj = subset(obj, subset = obj.anno_5_w_megs %!in% c("CD69"))
Idents(obj) = obj$obj.anno_3_w_megs
#now make a dimplot of each level and a proportion table
image = DimPlot(obj, group.by = "obj.anno_3_w_megs", label = TRUE, repel = TRUE,size=0.05) +
  NoLegend() +
  theme(
    axis.title = element_blank(),  # Removes axis titles
    axis.text = element_blank(),   # Removes axis text
    axis.ticks = element_blank(),  # Removes axis ticks
    axis.line = element_blank(),   # Removes axis lines
    panel.grid = element_blank()   # Removes grid lines if present
  ) +
  ggtitle("")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
image

Idents(obj) = obj$obj.anno_3_w_megs_w_stromal
#now make a dimplot of each level and a proportion table
image = DimPlot(obj, group.by = "obj.anno_3_w_megs_w_stromal", label = TRUE, repel = TRUE, ) +
  NoLegend() +
  theme(
    axis.title = element_blank(),  # Removes axis titles
    axis.text = element_blank(),   # Removes axis text
    axis.ticks = element_blank(),  # Removes axis ticks
    axis.line = element_blank(),   # Removes axis lines
    panel.grid = element_blank()   # Removes grid lines if present
  ) +
  ggtitle("")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
print(image)

Idents(obj) = obj$obj.anno_1_w_megs
#now make a dimplot of each level and a proportion table
image = DimPlot(obj, group.by = "obj.anno_1_w_megs", label = TRUE, repel = TRUE, ) +
  NoLegend() +
  theme(
    axis.title = element_blank(),  # Removes axis titles
    axis.text = element_blank(),   # Removes axis text
    axis.ticks = element_blank(),  # Removes axis ticks
    axis.line = element_blank(),   # Removes axis lines
    panel.grid = element_blank()   # Removes grid lines if present
  ) +
  ggtitle("")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
image

Idents(obj) = obj$obj.anno_5_w_megs
#now make a dimplot of each level and a proportion table
image = DimPlot(obj, group.by = "obj.anno_5_w_megs", label = TRUE, repel = TRUE, ) +
  NoLegend() +
  theme(
    axis.title = element_blank(),  # Removes axis titles
    axis.text = element_blank(),   # Removes axis text
    axis.ticks = element_blank(),  # Removes axis ticks
    axis.line = element_blank(),   # Removes axis lines
    panel.grid = element_blank()   # Removes grid lines if present
  ) +
  ggtitle("")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
image

DEG Heatmap

x <- unique(obj$obj.anno_3_w_megs_w_stromal)

#do differential expression for each cluster

Idents(obj) = obj$obj.anno_3_w_megs_w_stromal

pbmc.markers <- FindAllMarkers(obj, only.pos = TRUE)
## Calculating cluster DC
## Calculating cluster GMP
## Calculating cluster Macrophage
## Calculating cluster Monocyte
## Calculating cluster Adipo-MSC
## Calculating cluster Stromal
## Calculating cluster Myeloid
## Calculating cluster Plasma_cell
## Calculating cluster Erythroid
## Calculating cluster T_cell
## Calculating cluster HSPC
## Calculating cluster Osteo-MSC
## Calculating cluster B_cell
## Calculating cluster Endothelial
## Calculating cluster SMC
## Calculating cluster Megakaryocyte
## Calculating cluster Granulocyte/mast
pbmc.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1)
pbmc.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1) %>%
    slice_head(n = 10) %>%
    ungroup() -> top10

p <- top10 %>%
  mutate(cluster=factor(cluster, levels = x)) %>%
  arrange(cluster)

p
p$cluster = factor(p$cluster, levels = unique(obj$obj.anno_3_w_megs_w_stromal))

avgexp = AggregateExpression(obj, return.seurat = T)
## Names of identity class contain underscores ('_'), replacing with dashes ('-')
## This message is displayed once every 8 hours.
## Centering and scaling data matrix
image = DoHeatmap(avgexp, features = p$gene, size = 2, draw.lines = FALSE) + scale_fill_gradientn(colours = c("blue", "white", "red")) 
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
plot(image)
## Warning: Removed 121 rows containing missing values or values outside the scale range
## (`geom_point()`).

#featureplot

image <- FeaturePlot(
  obj,
  features = c("CD3D", "CD3E", "CD79A", "CD14", "FCGR3A", "GYPA", "VWF", "PECAM1", "CD34", "CTSG", "KIT", "PDGFRA"),
  ncol = 3
)
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
print(image)